#' Create exhibits for paper.

# Following packages must be installed:
# data.table, ggplot2, readstata13, textab

library(data.table)
library(ggplot2)
library(textab)
library(readstata13)

# Set this to the replication root directory (the same path as global root in master.do)
setwd("/path/to/replication")

# ===========================================================================
# Table 1: summary statistics (summary_statistics.tex)
# ===========================================================================

ss <- fread("estimates/admin/penalty_summary_stats.csv")
ss <- ss[treated == 0 | weighted == 1]
setkey(ss, male, treated, weighted)

# table header is the same for both tables
tts_head <- TexRow(c("")) / TexRow(1:6, dec = 0, surround = "(%s)") +
  TexRow(c("", "Women", "Men"), cspan = c(1, 3, 3)) +
  TexMidrule(list(c(2, 4), c(5, 7))) +
  TexRow(c("", rep(c("Control,", "Control,", "New"), 2))) +
  TexRow(c("", rep(c("pooled", "weighted", "parents"), 2))) +
  TexMidrule()

tts_owner <-
  TexRow(c("Age")) / TexRow(ss$age, dec = 1) +
  TexRow(c("College education")) / TexRow(ss$college_degree, dec = 3) +
  TexRow(c("Studied business")) / TexRow(ss$business_ed, dec = 3) +
  TexRow(c("Any children")) / TexRow(ss$any_child, dec = 3) +
  TexRow(c("Married/cohabiting (at time of birth)")) / TexRow(ss$married_cohabit, dec = 3)

tts_industry <-
  TexRow(c("Firm age")) / TexRow(ss$firm_age, dec = 1, space = 2) +
  TexRow("\\quad Accommodation and food service") / TexRow(ss$industry_accommodation, dec = 3) +
  TexRow("\\quad Administrative and support service") / TexRow(ss$industry_administrative, dec = 3) +
  TexRow("\\quad Arts, entertainment, and recreation") / TexRow(ss$industry_arts, dec = 3) +
  TexRow("\\quad Construction") / TexRow(ss$industry_construction, dec = 3) +
  TexRow("\\quad Education") / TexRow(ss$industry_education, dec = 3) +
  TexRow("\\quad Human health") / TexRow(ss$industry_health, dec = 3) +
  TexRow("\\quad Information and communication") / TexRow(ss$industry_info, dec = 3) +
  TexRow("\\quad Manufacturing") / TexRow(ss$industry_manufac, dec = 3) +
  TexRow("\\quad Other service") / TexRow(ss$industry_other_service, dec = 3) +
  TexRow("\\quad Professional, scientific, and technical") / TexRow(ss$industry_professional, dec = 3) +
  TexRow("\\quad Real estate") / TexRow(ss$industry_real_estate, dec = 3) +
  TexRow("\\quad Transportation and storage") / TexRow(ss$industry_transport, dec = 3) +
  TexRow("\\quad Wholesale and retail trade") / TexRow(ss$industry_trade, dec = 3, space = 2) +
  TexRow(c("Num. employees (excl. owners)")) / TexRow(ss$emp_excl_owner_mean, dec = 1)

tts_firm_bs <-
  TexRow(c("Profits, mean [median]")) / TexRow(ss$operating_profits_mean / 1000, dec = 1, dollar = F) +
  TexRow(c("")) / TexRow(ss$operating_profits_med / 1000, dec = 1, dollar = F, surround = "[%s]", space = 2) +
  TexRow(c("Revenues")) / TexRow(ss$revenues_mean / 1000, dec = 1, dollar = F) +
  TexRow(c("")) / TexRow(ss$revenues_med / 1000, dec = 1, dollar = F, surround = "[%s]", space = 2) +
  TexRow(c("Costs")) / TexRow(ss$costs_mean / 1000, dec = 1, dollar = F) +
  TexRow(c("")) / TexRow(ss$costs_med / 1000, dec = 1, dollar = F, surround = "[%s]", space = 2) +
  TexRow(c("Owner salaries")) / TexRow(ss$owner_sal_mean / 1000, dec = 1, dollar = F) +
  TexRow(c("")) / TexRow(ss$owner_sal_med / 1000, dec = 1, dollar = F, surround = "[%s]", space = 2)

tts <- tts_head +
  TexRow(c("\\emph{Panel A: Owner demographics}", rep("", 6))) +
  tts_owner +
  TexMidrule() +
  TexRow(c("\\emph{Panel B: Firm characteristics}", rep("", 6))) +
  tts_industry +
  TexMidrule() +
  TexRow(c("\\emph{Panel C: Balance sheets (thousands of USD)}", rep("", 6))) +
  tts_firm_bs +
  TexMidrule() +
  TexRow(c("Number of owners")) / TexRow(ss$n_owners, dec = 0) +
  TexRow(c("Number of firms")) / TexRow(ss$n_firms, dec = 0)
  
TexSave(tab = tts, positions = c("l", rep("c", 6)), 
        filename = "summary_statistics", output_path = "./exhibits")

# ===========================================================================
# Figure 1: profit responses (profits_es_pct.pdf)
# ===========================================================================

dt <- fread("estimates/admin/regression/profit_before_self_trim_ATT_male0.csv")
dt[, sex := "Women"]
dt_agg <- dt[aggregation == "est_pct_agg" & X == "pct_agg_post"]
dt_agg[, event_time := 11.5]
dt <- dt[aggregation == "est_pct"]
dt[, event_time := event_time - 6]

dt2 <- fread("estimates/admin/regression/profit_before_self_trim_ATT_male1.csv")
dt2[, sex := "Men"]
dt2_agg <- dt2[aggregation == "est_pct_agg" & X == "pct_agg_post"]
dt2_agg[, event_time := 11.5]
dt2 <- dt2[aggregation == "est_pct"]
dt2[, event_time := event_time - 6]

dt <- rbind(dt, dt2)
dt_agg <- rbind(dt_agg, dt2_agg)

# construct the labels
dt_agg[, lab := sprintf(
  "'%s:'~ widehat(beta)['post'] == %.2f ~ '('* %.2f *')'",
  sex, round(b, 2), round(se, 2)
)]
legend_labs <- lapply(dt_agg$lab, function(lbl) parse(text = lbl))

ggplot(dt, aes(x = event_time, y = b, color = sex, fill = sex, shape = sex)) +
  geom_point(size = 2, position=position_dodge(width=0.4)) +
  geom_vline(aes(xintercept = 0), linetype = "dotted", color = "red") +
  geom_vline(aes(xintercept = 10.75), linetype = "solid", color = "black", linewidth = 0.25) +
  geom_hline(aes(yintercept = 0), linetype = "dashed", color = "black") +
  geom_errorbar(aes(ymin = ll, ymax = ul), width = 0, linewidth = 0.3,
                position=position_dodge(width=0.4)) +
  scale_shape_manual(breaks = c("Women", "Men"), values = c(16, 17), labels = legend_labs) +
  scale_color_manual(breaks = c("Women", "Men"), values = c("#B22222", "#4682B4"), labels = legend_labs) +
  scale_fill_manual(breaks = c("Women", "Men"), values = c("#B22222", "#4682B4"), labels = legend_labs) +
  # manually add the avg estimates
  geom_errorbar(data = dt_agg[sex == "Women"],
                aes(x = event_time + 0.1, ymin = ll, ymax = ul), width = 0.15, linewidth = 0.3,
                position=position_dodge(width=0.4), color = "#B22222") +
  geom_point(data = dt_agg[sex == "Women"],
             aes(x = event_time + 0.1, y = b), color = "#B22222", shape = 21, size = 2,
             fill = "white", position=position_dodge(width=0.4)) + 
  geom_errorbar(data = dt_agg[sex == "Men"],
                aes(x = event_time - 0.1, ymin = ll, ymax = ul), width = 0.15, linewidth = 0.3,
                position=position_dodge(width=0.4), color = "#4682B4") +
  geom_point(data = dt_agg[sex == "Men"],
             aes(x = event_time - 0.1, y = b), color = "#4682B4", shape = 24, size = 2,
             fill = "white", position=position_dodge(width=0.4)) + 
  scale_y_continuous(breaks = round(seq(-0.4, 0.2, 0.2), 1)) +
  scale_x_continuous(breaks = c(seq(-5, 10, 1), 11.5), expand = c(0.01, 0.01),
                     labels = c("-5", "-4", "-3", "-2", "-1", "0",
                                "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "Avg, 1-10"),
                     limits = c(-5.1, 12.1)) +
  labs(x = "Time since first birth (years)",
       y = expression("Profits, relative to"~italic(t) == -2~"(pct. change)"),
       color = NULL, fill = NULL, shape = NULL) +
  theme_bw(base_size = 12) +
  theme(text = element_text(family = "Helvetica"),
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        legend.position = "inside",
        legend.position.inside = c(0.01, 0.01),
        legend.justification = c(0, 0),
        legend.text = element_text(size = 8.3),
        legend.box.margin = margin(5, 5, 5, 5),
        legend.margin = margin(0, 0, 5, 0))
ggsave("exhibits/profits_es_pct.pdf", width = 180, height = 100, units = "mm")

# ===========================================================================
# Figure A3: entrepreneurs vs wage earners (wages_vs_founder_salaries.pdf)
# ===========================================================================

dt <- fread("estimates/admin/wage_penalty/wage_penalty_male0.csv")
dt[, event_time := as.integer(gsub("et_", "", X)) - 6]
dt <- rbind(dt, data.table(event_time = -2, b = 0), fill = T)
dt[, sex := "Non-entrepreneurs"]

dt_agg <- fread("estimates/admin/wage_penalty/wage_penalty_male0_agg.csv")
dt_agg[, sex := "Non-entrepreneurs"]
dt_agg[, event_time := 11.5]

dt2 <- fread("estimates/admin/regression/lnr_lfirm_salary_trim_ATT_male0.csv")
dt2[, sex := "Entrepreneurs"]
dt2_agg <- dt2[aggregation == "est_pct_agg" & X == "pct_agg_post"]
dt2_agg[, event_time := 11.5]
dt2 <- dt2[aggregation == "est_pct"]
dt2[, event_time := event_time - 6]

dt <- rbind(dt, dt2, fill = T)
dt_agg <- rbind(dt_agg, dt2_agg, fill = T)

ggplot(dt, aes(x = event_time, y = b, color = sex, fill = sex, shape = sex)) +
  geom_point(size = 2, position=position_dodge(width=0.4)) +
  geom_vline(aes(xintercept = 0), linetype = "dotted", color = "red") +
  geom_vline(aes(xintercept = 10.75), linetype = "solid", color = "black", linewidth = 0.25) +
  geom_hline(aes(yintercept = 0), linetype = "dashed", color = "black") +
  geom_errorbar(aes(ymin = ll, ymax = ul), width = 0, linewidth = 0.3,
                position=position_dodge(width=0.4)) +
  scale_shape_manual(breaks = c("Entrepreneurs", "Non-entrepreneurs"), values = c(16, 15)) + #, labels = legend_labs) +
  scale_color_manual(breaks = c("Entrepreneurs", "Non-entrepreneurs"), values = c("#B22222", "#FF8C00")) + #, labels = legend_labs) +
  scale_fill_manual(breaks = c("Entrepreneurs", "Non-entrepreneurs"), values = c("#B22222", "#FF8C00")) + #, labels = legend_labs) +
  # manually add the avg estimates
  geom_errorbar(data = dt_agg[sex == "Entrepreneurs"],
                aes(x = event_time + 0.1, ymin = ll, ymax = ul), width = 0.15, linewidth = 0.3,
                position=position_dodge(width=0.4), color = "#B22222") +
  geom_point(data = dt_agg[sex == "Entrepreneurs"],
             aes(x = event_time + 0.1, y = b), color = "#B22222", shape = 21, size = 2,
             fill = "white", position=position_dodge(width=0.4)) + 
  geom_errorbar(data = dt_agg[sex == "Non-entrepreneurs"],
                aes(x = event_time - 0.1, ymin = ll, ymax = ul), width = 0.15, linewidth = 0.3,
                position=position_dodge(width=0.4), color = "#FF8C00") +
  geom_point(data = dt_agg[sex == "Non-entrepreneurs"],
             aes(x = event_time - 0.1, y = b), color = "#FF8C00", shape = 22, size = 2,
             fill = "white", position=position_dodge(width=0.4)) + 
  scale_y_continuous(breaks = round(seq(-0.4, 0.2, 0.2), 1)) +
  scale_x_continuous(breaks = c(seq(-5, 10, 1), 11.5), expand = c(0.01, 0.01),
                     labels = c("-5", "-4", "-3", "-2", "-1", "0",
                                "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "Avg, 1-10"),
                     limits = c(-5.1, 12.1)) +
  labs(x = "Time since first birth (years)",
       y = expression("Wages, relative to"~italic(t) == -2~"(pct. change)"),
       color = NULL, fill = NULL, shape = NULL) +
  theme_bw(base_size = 12) +
  theme(text = element_text(family = "Helvetica"),
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        legend.position = "inside",
        legend.position.inside = c(0.01, 0.01),
        legend.justification = c(0, 0),
        legend.text = element_text(size = 8.3),
        legend.box.margin = margin(5, 5, 5, 5),
        legend.margin = margin(0, 0, 5, 0))
ggsave("exhibits/wages_vs_founder_salaries.pdf", width = 180, height = 100, units = "mm")

# ===========================================================================
# Table 2: other outcome responses (firm_responses.tex)
# ===========================================================================

# sold firm (hazard), profits, firm active, log(revenues), log(costs), employment
dt <- data.table()

# sold firm (hazard)
dt_tmp <- fread("estimates/admin/regression/sold_firm_hazard_ATT_male0.csv")
dt_tmp[, outcome := "Sold firm (hazard)"]
dt_tmp[, sex := "Female"]
dt <- rbind(dt, dt_tmp)

dt_tmp <- fread("estimates/admin/regression/sold_firm_hazard_ATT_male1.csv")
dt_tmp[, outcome := "Sold firm (hazard)"]
dt_tmp[, sex := "Male"]
dt <- rbind(dt, dt_tmp)

# profits (level)
dt_tmp <- fread("estimates/admin/regression/profit_before_self_trim_ATT_male0.csv")
dt_tmp[, outcome := "Profits"]
dt_tmp[, sex := "Female"]
dt <- rbind(dt, dt_tmp)

dt_tmp <- fread("estimates/admin/regression/profit_before_self_trim_ATT_male1.csv")
dt_tmp[, outcome := "Profits"]
dt_tmp[, sex := "Male"]
dt <- rbind(dt, dt_tmp)

# total value added
dt_tmp <- fread("estimates/admin/regression/value_added_trim_ATT_male0.csv")
dt_tmp[, outcome := "Value added"]
dt_tmp[, sex := "Female"]
dt <- rbind(dt, dt_tmp)

dt_tmp <- fread("estimates/admin/regression/value_added_trim_ATT_male1.csv")
dt_tmp[, outcome := "Value added"]
dt_tmp[, sex := "Male"]
dt <- rbind(dt, dt_tmp)

# firm active
dt_tmp <- fread("estimates/admin/regression/pos_rev_ATT_male0.csv")
dt_tmp[, outcome := "Any revenues"]
dt_tmp[, sex := "Female"]
dt <- rbind(dt, dt_tmp)

dt_tmp <- fread("estimates/admin/regression/pos_rev_ATT_male1.csv")
dt_tmp[, outcome := "Any revenues"]
dt_tmp[, sex := "Male"]
dt <- rbind(dt, dt_tmp)

# log(revenues)
dt_tmp <- fread("estimates/admin/regression/log_revenues_ATT_male0.csv")
dt_tmp[, outcome := "log(revenues)"]
dt_tmp[, sex := "Female"]
dt <- rbind(dt, dt_tmp, fill = T)

dt_tmp <- fread("estimates/admin/regression/log_revenues_ATT_male1.csv")
dt_tmp[, outcome := "log(revenues)"]
dt_tmp[, sex := "Male"]
dt <- rbind(dt, dt_tmp, fill = T)

# log(operating costs)
dt_tmp <- fread("estimates/admin/regression/log_costs_ATT_male0.csv")
dt_tmp[, outcome := "log(operating costs)"]
dt_tmp[, sex := "Female"]
dt <- rbind(dt, dt_tmp, fill = T)

dt_tmp <- fread("estimates/admin/regression/log_costs_ATT_male1.csv")
dt_tmp[, outcome := "log(operating costs)"]
dt_tmp[, sex := "Male"]
dt <- rbind(dt, dt_tmp, fill = T)

# Num workers
dt_tmp <- fread("estimates/admin/regression/n_emp_excl_owners_trim_ATT_male0.csv")
dt_tmp[, outcome := "Employees"]
dt_tmp[, sex := "Female"]
dt <- rbind(dt, dt_tmp, fill = T)

dt_tmp <- fread("estimates/admin/regression/n_emp_excl_owners_trim_ATT_male1.csv")
dt_tmp[, outcome := "Employees"]
dt_tmp[, sex := "Male"]
dt <- rbind(dt, dt_tmp, fill = T)

dt_tmp <- fread("estimates/admin/regression/owner_salaries_trim_ATT_male0.csv")
dt_tmp[, outcome := "Owner salaries"]
dt_tmp[, sex := "Female"]
dt <- rbind(dt, dt_tmp, fill = T)

dt_tmp <- fread("estimates/admin/regression/owner_salaries_trim_ATT_male1.csv")
dt_tmp[, outcome := "Owner salaries"]
dt_tmp[, sex := "Male"]
dt <- rbind(dt, dt_tmp, fill = T)
dt <- dt[aggregation == "summary" | (aggregation == "est_es" & X %in% c("m1", "p0")) | X == "agg_post"]

# get the means in t = -1 for everything except "sold firm"
dt_means <- fread("estimates/admin/regression/pretreat_means.csv")

dt_means <- melt(dt_means, id.vars = "male")
dt_means <- merge(dt_means, data.table(variable = c("profit_before_self_trim", 
                                                    "owner_salaries_trim", 
                                                    "value_added_trim",
                                                    "log_revenues", 
                                                    "log_costs", 
                                                    "n_emp_excl_owners_trim", 
                                                    "pos_rev"),
                                       outcome = c("Profits",
                                                   "Owner salaries", 
                                                   "Value added",
                                                   "log(revenues)",
                                                   "log(operating costs)",
                                                   "Employees",
                                                   "Any revenues")))
dt_means[, sex := ifelse(male == 1, "Male", "Female")]
dt_means <- dt_means[, .(outcome, pre_mean = value, sex)]

# sort columns and merge on the means
dt <- merge(dt, data.table(outcome = c("Profits", 
                                       "Owner salaries", 
                                       "Value added",
                                       "log(revenues)", 
                                       "log(operating costs)", 
                                       "Employees", 
                                       "Any revenues", 
                                       "Sold firm (hazard)"),
                           col_ord = 1:8),
            by = "outcome")
dt <- merge(dt, dt_means, by = c("outcome", "sex"), all.x = T)
dt[outcome != "Sold firm (hazard)" & X == "dep_mean", b := pre_mean]
setkey(dt, col_ord)

# first, columns for women
dt[is.na(pvalue), pvalue := 1]

col_dec <- c(0, 0, 0, 3, 3, 3, 3, 3)

tt_women <- 
  TexRow("") / TexRow(1:8, dec = 0, surround = "(%s)", space = 3) +
  TexRow(c("", "", "Owner", "Value", "Log", "Log", "", "Firm", "Sold")) +
  TexRow(c("", "Profits", "salaries", "added", "revenues", "costs", "Employees", "active", "firm")) +
  TexMidrule(list(c(2, 9))) +
  TexRow("Women, $t = -1$") / TexRow(dt[X == "m1" & sex == "Female"]$b,
                                     dec = col_dec,
                                     pvalues = dt[X == "m1" & sex == "Female"]$pvalue) +
  TexRow("") / TexRow(dt[X == "m1" & sex == "Female"]$se, dec = col_dec, se = T,
                      space = 3)  +
  TexRow("Women, $t = 0$") / TexRow(dt[X == "p0" & sex == "Female"]$b,
                                     dec = col_dec,
                                     pvalues = dt[X == "p0" & sex == "Female"]$pvalue) +
  TexRow("") / TexRow(dt[X == "p0" & sex == "Female"]$se, dec = col_dec, se = T,
                      space = 3) +
  TexRow("Women, $t > 0$ avg.") / TexRow(dt[X == "agg_post" & sex == "Female"]$b,
                                    dec = col_dec,
                                    pvalues = dt[X == "agg_post" & sex == "Female"]$pvalue) +
  TexRow("") / TexRow(dt[X == "agg_post" & sex == "Female"]$se, dec = col_dec, se = T) +
  TexMidrule(list(c(2, 9))) +
  TexRow("$\\bar{Y}$, female owners") /
  TexRow(dt[X == "dep_mean" & sex == "Female"]$b, dec = col_dec) +
  TexRow("$N$ female owners") /
  TexRow(dt[X == "n_owners" & sex == "Female"]$b, dec = 0)

tt_men <-
  TexRow("Men, $t = -1$") / TexRow(dt[X == "m1" & sex == "Male"]$b,
                                     dec = col_dec,
                                     pvalues = dt[X == "m1" & sex == "Male"]$pvalue) +
  TexRow("") / TexRow(dt[X == "m1" & sex == "Male"]$se, dec = col_dec, se = T,
                      space = 3)  +
  TexRow("Men, $t = 0$") / TexRow(dt[X == "p0" & sex == "Male"]$b,
                                    dec = col_dec,
                                    pvalues = dt[X == "p0" & sex == "Male"]$pvalue) +
  TexRow("") / TexRow(dt[X == "p0" & sex == "Male"]$se, dec = col_dec, se = T,
                      space = 3) +
  TexRow("Men, $t > 0$ avg.") / TexRow(dt[X == "agg_post" & sex == "Male"]$b,
                                         dec = col_dec,
                                         pvalues = dt[X == "agg_post" & sex == "Male"]$pvalue) +
  TexRow("") / TexRow(dt[X == "agg_post" & sex == "Male"]$se, dec = col_dec, se = T) +
  TexMidrule(list(c(2, 9))) +
  TexRow("$\\bar{Y}$, male owners") /
  TexRow(dt[X == "dep_mean" & sex == "Male"]$b, dec = col_dec) +
  TexRow("$N$ male owners") /
  TexRow(dt[X == "n_owners" & sex == "Male"]$b, dec = 0)

tt <- tt_women + TexMidrule() + tt_men

# add double dashes instead of single for negatives
for (m in c(5, 7, 9, 16, 18, 20)) {
  for (n in 2:9) {
    tt$row_list[[m]][n] <- gsub("-", "--", tt$row_list[[m]][n])
  }
}

# replace the t = -1 "sold firm" with ---
tt$row_list[[5]][9] <- "---"
tt$row_list[[6]][9] <- ""
tt$row_list[[15]][9] <- "---"
tt$row_list[[16]][9] <- ""

TexSave(tab = tt, positions = c("r", rep("c", 8)), 
        filename = "firm_responses", output_path = "./exhibits")

# ===========================================================================
# Figure 2: profit responses by firm FE (profit_by_fe.pdf)
# ===========================================================================

dt_dec <- data.table()
for (d in 1:4) {
  dt_temp <- fread(paste0("estimates/admin/fe_quartiles/profit_before_self_trim_quartile", d, "_male0_ATT.csv"))
  dt_temp[, decile := d]
  dt_temp[, sex := "Women"]
  dt_dec <- rbind(dt_dec, dt_temp)
  
  dt_temp <- fread(paste0("estimates/admin/fe_quartiles/profit_before_self_trim_quartile", d, "_male1_ATT.csv"))
  dt_temp[, decile := d]
  dt_temp[, sex := "Men"]
  dt_dec <- rbind(dt_dec, dt_temp)
}

dt_dec_agg <- dt_dec[aggregation == "est_pct_agg" & X == "pct_agg_post"]
dt_dec_agg$event_time <- NULL
dt_dec_agg[, event_time := 11.5]

dt_dec <- dt_dec[aggregation == "est_pct"]
dt_dec[, X := gsub("pct_", "", X)]
dt_dec[, event_time := ifelse(grepl("m", X), -1, 1) * as.integer(gsub("m|p", "", X))]

# prep sorted data for later plots
dt_dec_plt <- rbindlist(list(dt_dec[event_time == 0],
                             dt_dec_agg),
                        use.names = T)
dt_dec_plt[, event_time_lab := ifelse(event_time == 0, 
                                      "'Birth year ('*italic(t) == 0*')'",
                                      "'Post-period average ('*italic(t) %in% '[1, 10])'")]

ggplot(dt_dec_plt,
       aes(x = decile, y = b, color = sex, shape = sex)) +
  facet_wrap(~event_time_lab, ncol = 1,
             labeller = label_parsed) +
  geom_point(position = position_dodge(0.5)) +
  geom_errorbar(aes(ymin = ll, ymax = ul), width = 0, linewidth = 0.5,
                position = position_dodge(0.5)) + 
  geom_hline(aes(yintercept = 0), color = "red") +
  scale_y_continuous(breaks = seq(-0.5, 0.5, 0.25)) +
  scale_color_manual(breaks = c("Women", "Men"), values = c("#B22222", "#4682B4")) +
  scale_shape_manual(breaks = c("Women", "Men"), values = c(16, 17)) +
  scale_x_continuous(breaks = seq(1, 4, 1),
                     labels = c("10-30", "30-50", "50-70", "70-90")) +
  labs(x = "Firm fixed effect (percentile)",
       y = "Profit response (pct. change)", color = NULL, shape = NULL) +
  coord_flip() +
  theme_bw(base_size = 12) +
  theme(panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank(),
        legend.position = "inside",
        legend.position.inside = c(0.99, 0.99),
        legend.justification = c(1, 1))
ggsave("exhibits/profit_by_fe.pdf",
       width = 150, height = 130, units = "mm")

# ===========================================================================
# Table 3: heterogeneity, female (heterogeneity_profits.tex)
# ===========================================================================

dt <- data.table()

# ownership
for (x in 0:1) {
  dt_tmp <- fread(sprintf("estimates/admin/heterogeneity/ownership/profit_before_self_trim_ATT_male0_owner%s.csv", x))
  dt_tmp[, group := ifelse(x == 0, "Has co-owners", "Majority owner")]
  dt_tmp[, col_ord := ifelse(x == 1, 1, 2)]
  dt <- rbind(dt, dt_tmp)
}

# any employees
for (x in 0:1) {
  dt_tmp <- fread(sprintf("estimates/admin/heterogeneity/any_emp/profit_before_self_trim_ATT_male0_anyemp%s.csv", x))
  dt_tmp[, group := ifelse(x == 0, "No employees", "Employees")]
  dt_tmp[, col_ord := ifelse(x == 1, 3, 4)]
  dt <- rbind(dt, dt_tmp)
}

# face-to-face contact
for (x in 0:1) {
  dt_tmp <- fread(sprintf("estimates/admin/heterogeneity/face_contact/profit_before_self_trim_ATT_male0_dailyface%s.csv", x))
  dt_tmp[, group := ifelse(x == 0, "Below average", "Above average")]
  dt_tmp[, col_ord := ifelse(x == 1, 5, 6)]
  dt <- rbind(dt, dt_tmp)
}

# partner employed
for (x in 0:1) {
  dt_tmp <- fread(sprintf("estimates/admin/heterogeneity/partner_emp/profit_before_self_trim_ATT_male0_partneremp%s.csv", x))
  dt_tmp[, group := ifelse(x == 1, "Spouse employed", "Spouse not employed")]
  dt_tmp[, col_ord := ifelse(x == 1, 7, 8)]
  dt <- rbind(dt, dt_tmp)
}

# live near parents
for (x in 0:1) {
  dt_tmp <- fread(sprintf("estimates/admin/heterogeneity/near_parents/profit_before_self_trim_ATT_male0_nearparents%s.csv", x))
  dt_tmp[, group := ifelse(x == 0, "Not near parents", "Near parents")]
  dt_tmp[, col_ord := ifelse(x == 1, 9, 10)]
  dt <- rbind(dt, dt_tmp)
}

dt <- dt[aggregation == "summary" | (aggregation == "est_pct" & X %in% c("pct_m1", "pct_p0")) | X == "pct_agg_post"]

# get the means
dt_means1 <- fread("estimates/admin/heterogeneity/ownership/pretreat_means_majowner_male0.csv")
dt_means1[, group := ifelse(maj_owner == 0, "Has co-owners", "Majority owner")]

dt_means2 <- fread("estimates/admin/heterogeneity/partner_emp/pretreat_means_partneremp_male0.csv")
dt_means2[, group := ifelse(partner_employed == 1, "Spouse employed", "Spouse not employed")]

dt_means3 <- fread("estimates/admin/heterogeneity/face_contact/pretreat_means_onet_male0.csv")
dt_means3[, group := ifelse(daily_ind == 0, "Below average", "Above average")]

dt_means4 <- fread("estimates/admin/heterogeneity/any_emp/pretreat_means_anyemp_male0.csv")
dt_means4[, group := ifelse(any_emp == 0, "No employees", "Employees")]

dt_means5 <- fread("estimates/admin/heterogeneity/near_parents/pretreat_means_nearparents_male0.csv")
dt_means5[, group := ifelse(live_near_parents == 0, "Not near parents", "Near parents")]

dt_means <- rbindlist(list(dt_means1[, .(group, profit_before_self_trim)], 
                           dt_means2[, .(group, profit_before_self_trim)], 
                           dt_means3[, .(group, profit_before_self_trim)], 
                           dt_means4[, .(group, profit_before_self_trim)], 
                           dt_means5[, .(group, profit_before_self_trim)]))

dt <- merge(dt, dt_means, by = "group")
dt[X == "dep_mean", b := profit_before_self_trim]

# make the table
setkey(dt, col_ord)
tt_het1 <- 
  TexRow(c("", "$>50\\%$ ownership", "Any employees", "Face-to-face contact"),
         cspan = c(1, 2, 2, 2)) +
  TexMidrule(list(c(2, 3), c(4, 5), c(6, 7))) +
  TexRow(c("\\emph{Profits, pct. change}", rep(c("Yes", "No"), 2), "Higher", "Lower")) +
  TexMidrule() +
  TexRow("$t = -1$") / TexRow(dt[X == "pct_m1" & col_ord <= 6]$b, dec = 3,
                              pvalues = dt[X == "pct_m1" & col_ord <= 6]$pvalue) +
  TexRow("") / TexRow(dt[X == "pct_m1" & col_ord <= 6]$se, dec = 3, se = T, space = 3)  +
  TexRow("$t = 0$") / TexRow(dt[X == "pct_p0" & col_ord <= 6]$b, dec = 3,
                                    pvalues = dt[X == "pct_p0" & col_ord <= 6]$pvalue) +
  TexRow("") / TexRow(dt[X == "pct_p0" & col_ord <= 6]$se, dec = 3, se = T,
                      space = 3) +
  TexRow("$t > 0$ avg.") / TexRow(dt[X == "pct_agg_post" & col_ord <= 6]$b, dec = 3,
                                         pvalues = dt[X == "pct_agg_post" & col_ord <= 6]$pvalue) +
  TexRow("") / TexRow(dt[X == "pct_agg_post" & col_ord <= 6]$se, dec = 3, se = T) +
  TexMidrule(list(c(2, 7))) +
  TexRow("Mean profits") / TexRow(dt[X == "dep_mean" & col_ord <= 6]$b, dec = 0, dollar = T) +
  TexRow("$N$ owners") / TexRow(dt[X == "n_owners" & col_ord <= 6]$b, dec = 0)

tt_het2 <- 
  TexRow(c("", "Spouse employed", "Parents nearby", ""),
         cspan = c(1, 2, 2, 2)) +
  TexMidrule(list(c(2, 3), c(4, 5))) +
  TexRow(c("\\emph{Profits, pct. change}", rep(c("Yes", "No"), 2), "", "")) +
  TexMidrule(list(c(1, 5))) +
  TexRow("$t = -1$") / TexRow(dt[X == "pct_m1" & col_ord > 6]$b, dec = 3,
                              pvalues = dt[X == "pct_m1" & col_ord > 6]$pvalue) +
  TexRow("") / TexRow(dt[X == "pct_m1" & col_ord > 6]$se, dec = 3, se = T, space = 3)  +
  TexRow("$t = 0$") / TexRow(dt[X == "pct_p0" & col_ord > 6]$b, dec = 3,
                             pvalues = dt[X == "pct_p0" & col_ord > 6]$pvalue) +
  TexRow("") / TexRow(dt[X == "pct_p0" & col_ord > 6]$se, dec = 3, se = T,
                      space = 3) +
  TexRow("$t > 0$ avg.") / TexRow(dt[X == "pct_agg_post" & col_ord > 6]$b, dec = 3,
                                  pvalues = dt[X == "pct_agg_post" & col_ord > 6]$pvalue) +
  TexRow("") / TexRow(dt[X == "pct_agg_post" & col_ord > 6]$se, dec = 3, se = T) +
  TexMidrule(list(c(2, 5))) +
  TexRow("Mean profits") / TexRow(dt[X == "dep_mean" & col_ord > 6]$b, dec = 0, dollar = T) +
  TexRow("$N$ owners") / TexRow(dt[X == "n_owners" & col_ord > 6]$b, dec = 0)

tt_het <- tt_het1 + TexMidrule() + tt_het2

TexSave(tab = tt_het, positions = c("r", rep("c", 6)),
        filename = "heterogeneity_profits", output_path = "./exhibits")

# ===========================================================================
# Table A5: heterogeneity, male (heterogeneity_profits_male.tex)
# ===========================================================================

dt <- data.table()

# ownership
for (x in 0:1) {
  dt_tmp <- fread(sprintf("estimates/admin/heterogeneity/male/ownership/profit_before_self_trim_ATT_male1_owner%s.csv", x))
  dt_tmp[, group := ifelse(x == 0, "Has co-owners", "Majority owner")]
  dt_tmp[, col_ord := ifelse(x == 1, 1, 2)]
  dt <- rbind(dt, dt_tmp)
}

# any employees
for (x in 0:1) {
  dt_tmp <- fread(sprintf("estimates/admin/heterogeneity/male/any_emp/profit_before_self_trim_ATT_male1_anyemp%s.csv", x))
  dt_tmp[, group := ifelse(x == 0, "No employees", "Employees")]
  dt_tmp[, col_ord := ifelse(x == 1, 3, 4)]
  dt <- rbind(dt, dt_tmp)
}

# face-to-face contact
for (x in 0:1) {
  dt_tmp <- fread(sprintf("estimates/admin/heterogeneity/male/face_contact/profit_before_self_trim_ATT_male1_dailyface%s.csv", x))
  dt_tmp[, group := ifelse(x == 0, "Below average", "Above average")]
  dt_tmp[, col_ord := ifelse(x == 1, 5, 6)]
  dt <- rbind(dt, dt_tmp)
}

# partner employed
for (x in 0:1) {
  dt_tmp <- fread(sprintf("estimates/admin/heterogeneity/male/partner_emp/profit_before_self_trim_ATT_male1_partneremp%s.csv", x))
  dt_tmp[, group := ifelse(x == 1, "Spouse employed", "Spouse not employed")]
  dt_tmp[, col_ord := ifelse(x == 1, 7, 8)]
  dt <- rbind(dt, dt_tmp)
}

# live near parents
for (x in 0:1) {
  dt_tmp <- fread(sprintf("estimates/admin/heterogeneity/male/near_parents/profit_before_self_trim_ATT_male1_nearparents%s.csv", x))
  dt_tmp[, group := ifelse(x == 0, "Not near parents", "Near parents")]
  dt_tmp[, col_ord := ifelse(x == 1, 9, 10)]
  dt <- rbind(dt, dt_tmp)
}

dt <- dt[aggregation == "summary" | (aggregation == "est_pct" & X %in% c("pct_m1", "pct_p0")) | X == "pct_agg_post"]

# get the means
dt_means1 <- fread("estimates/admin/heterogeneity/male/ownership/pretreat_means_majowner_male1.csv")
dt_means1[, group := ifelse(maj_owner == 0, "Has co-owners", "Majority owner")]

dt_means2 <- fread("estimates/admin/heterogeneity/male/partner_emp/pretreat_means_partneremp_male1.csv")
dt_means2[, group := ifelse(partner_employed == 1, "Spouse employed", "Spouse not employed")]

dt_means3 <- fread("estimates/admin/heterogeneity/male/face_contact/pretreat_means_onet_male1.csv")
dt_means3[, group := ifelse(daily_ind == 0, "Below average", "Above average")]

dt_means4 <- fread("estimates/admin/heterogeneity/male/any_emp/pretreat_means_anyemp_male1.csv")
dt_means4[, group := ifelse(any_emp == 0, "No employees", "Employees")]

dt_means5 <- fread("estimates/admin/heterogeneity/male/near_parents/pretreat_means_nearparents_male1.csv")
dt_means5[, group := ifelse(live_near_parents == 0, "Not near parents", "Near parents")]

dt_means <- rbindlist(list(dt_means1[, .(group, profit_before_self_trim)], 
                           dt_means2[, .(group, profit_before_self_trim)], 
                           dt_means3[, .(group, profit_before_self_trim)], 
                           dt_means4[, .(group, profit_before_self_trim)], 
                           dt_means5[, .(group, profit_before_self_trim)]))

dt <- merge(dt, dt_means, by = "group")
dt[X == "dep_mean", b := profit_before_self_trim]

# make the table
setkey(dt, col_ord)
tt_het1 <- 
  TexRow(c("", "$>50\\%$ ownership", "Any employees", "Face-to-face contact"),
         cspan = c(1, 2, 2, 2)) +
  TexMidrule(list(c(2, 3), c(4, 5), c(6, 7))) +
  TexRow(c("\\emph{Profits, pct. change}", rep(c("Yes", "No"), 2), "Higher", "Lower")) +
  TexMidrule() +
  TexRow("$t = -1$") / TexRow(dt[X == "pct_m1" & col_ord <= 6]$b, dec = 3,
                              pvalues = dt[X == "pct_m1" & col_ord <= 6]$pvalue) +
  TexRow("") / TexRow(dt[X == "pct_m1" & col_ord <= 6]$se, dec = 3, se = T, space = 3)  +
  TexRow("$t = 0$") / TexRow(dt[X == "pct_p0" & col_ord <= 6]$b, dec = 3,
                             pvalues = dt[X == "pct_p0" & col_ord <= 6]$pvalue) +
  TexRow("") / TexRow(dt[X == "pct_p0" & col_ord <= 6]$se, dec = 3, se = T,
                      space = 3) +
  TexRow("$t > 0$ avg.") / TexRow(dt[X == "pct_agg_post" & col_ord <= 6]$b, dec = 3,
                                  pvalues = dt[X == "pct_agg_post" & col_ord <= 6]$pvalue) +
  TexRow("") / TexRow(dt[X == "pct_agg_post" & col_ord <= 6]$se, dec = 3, se = T) +
  TexMidrule(list(c(2, 7))) +
  TexRow("Mean profits") / TexRow(dt[X == "dep_mean" & col_ord <= 6]$b, dec = 0, dollar = T) +
  TexRow("$N$ owners") / TexRow(dt[X == "n_owners" & col_ord <= 6]$b, dec = 0)

tt_het2 <- 
  TexRow(c("", "Spouse employed", "Parents nearby", ""),
         cspan = c(1, 2, 2, 2)) +
  TexMidrule(list(c(2, 3), c(4, 5))) +
  TexRow(c("\\emph{Profits, pct. change}", rep(c("Yes", "No"), 2), "", "")) +
  TexMidrule(list(c(1, 5))) +
  TexRow("$t = -1$") / TexRow(dt[X == "pct_m1" & col_ord > 6]$b, dec = 3,
                              pvalues = dt[X == "pct_m1" & col_ord > 6]$pvalue) +
  TexRow("") / TexRow(dt[X == "pct_m1" & col_ord > 6]$se, dec = 3, se = T, space = 3)  +
  TexRow("$t = 0$") / TexRow(dt[X == "pct_p0" & col_ord > 6]$b, dec = 3,
                             pvalues = dt[X == "pct_p0" & col_ord > 6]$pvalue) +
  TexRow("") / TexRow(dt[X == "pct_p0" & col_ord > 6]$se, dec = 3, se = T,
                      space = 3) +
  TexRow("$t > 0$ avg.") / TexRow(dt[X == "pct_agg_post" & col_ord > 6]$b, dec = 3,
                                  pvalues = dt[X == "pct_agg_post" & col_ord > 6]$pvalue) +
  TexRow("") / TexRow(dt[X == "pct_agg_post" & col_ord > 6]$se, dec = 3, se = T) +
  TexMidrule(list(c(2, 5))) +
  TexRow("Mean profits") / TexRow(dt[X == "dep_mean" & col_ord > 6]$b, dec = 0, dollar = T) +
  TexRow("$N$ owners") / TexRow(dt[X == "n_owners" & col_ord > 6]$b, dec = 0)

tt_het <- tt_het1 + TexMidrule() + tt_het2


TexSave(tab = tt_het, positions = c("r", rep("c", 6)), 
        filename = "heterogeneity_profits_male", output_path = "./exhibits")

# ===========================================================================
# Table A1: robustness to sample criteria (robustness_sample_criteria.tex)
## baseline (original)
## changing ownership restriction (25%)
## changing ownership restriction (50%)
## changing age restriction (3+)
## changing age restriction (5+)
## changing age restriction (7+)
# ===========================================================================

dt <- data.table()

# original outcome, method
dt_tmp <- fread("estimates/admin/regression/profit_before_self_trim_ATT_male0.csv")
dt_tmp[, group := "orig"]
dt <- rbind(dt, dt_tmp)

# 25% ownership restriction
dt_tmp <- fread("estimates/admin/robustness/ownership_share/profit_before_self_trim_25plus_male0.csv")
dt_tmp[, group := "own25"]
dt <- rbind(dt, dt_tmp)

# 50% ownership restriction
dt_tmp <- fread("estimates/admin/robustness/ownership_share/profit_before_self_trim_50plus_male0.csv")
dt_tmp[, group := "own50"]
dt <- rbind(dt, dt_tmp)

# 3+ age restriction (in t-1)
dt_tmp <- fread("estimates/admin/robustness/firm_age/profit_before_self_trim_minage3_male0.csv")
dt_tmp[, group := "age3"]
dt <- rbind(dt, dt_tmp)

# 5+ age restriction (in t-1)
dt_tmp <- fread("estimates/admin/robustness/firm_age/profit_before_self_trim_minage5_male0.csv")
dt_tmp[, group := "age5"]
dt <- rbind(dt, dt_tmp)

# 7+ age restriction (in t-1)
dt_tmp <- fread("estimates/admin/robustness/firm_age/profit_before_self_trim_minage7_male0.csv")
dt_tmp[, group := "age7"]
dt <- rbind(dt, dt_tmp)

dt <- dt[aggregation == "summary" | 
           (aggregation == "est_pct" & X %in% c("pct_m1", "pct_p0")) | 
           (aggregation == "est_es" & X %in% c("m1", "p0")) | 
           X %in% c("agg_post", "pct_agg_post")]

# get the means
dt_means1 <- fread("estimates/admin/regression/pretreat_means.csv")
dt_means1 <- dt_means1[male == 0, .(profit_before_self_trim)]
dt_means1[, group := "orig"]

dt_means2 <- fread("estimates/admin/robustness/ownership_share/pretreat_means_25plus.csv")
dt_means2 <- dt_means2[male == 0]
dt_means2[, group := "own25"]

dt_means3 <- fread("estimates/admin/robustness/ownership_share/pretreat_means_50plus.csv")
dt_means3 <- dt_means3[male == 0]
dt_means3[, group := "own50"]

dt_means4 <- fread("estimates/admin/robustness/firm_age/pretreat_means_minage3.csv")
dt_means4 <- dt_means4[male == 0]
dt_means4[, group := "age3"]

dt_means5 <- fread("estimates/admin/robustness/firm_age/pretreat_means_minage5.csv")
dt_means5 <- dt_means5[male == 0]
dt_means5[, group := "age5"]

dt_means6 <- fread("estimates/admin/robustness/firm_age/pretreat_means_minage7.csv")
dt_means6 <- dt_means6[male == 0]
dt_means6[, group := "age7"]

dt_means <- rbindlist(list(dt_means1[, .(group, profit_before_self_trim)], 
                           dt_means2[, .(group, profit_before_self_trim)], 
                           dt_means3[, .(group, profit_before_self_trim)], 
                           dt_means4[, .(group, profit_before_self_trim)], 
                           dt_means5[, .(group, profit_before_self_trim)], 
                           dt_means6[, .(group, profit_before_self_trim)]))

dt <- merge(dt, dt_means, by = "group")
dt[X == "dep_mean", b := profit_before_self_trim]

# order the columns
dt <- merge(dt, data.table(group = c("orig", "own25", "own50", "age3", "age5", "age7"),
                           col_ord = 1:6), by = "group")
setkey(dt, col_ord)

# make the table
tt_rob <- 
  TexRow("") / TexRow(1:6, dec = 0, surround = "(%s)") +
  TexMidrule(list(c(2, 7))) +
  TexRow(c("", "Response as percentage change"), cspan = c(1, 6)) +
  TexMidrule(list(c(2, 7))) +
  # First panel: percentage change
  TexRow("$t = -1$") / TexRow(dt[X == "pct_m1"]$b, pvalues = dt[X == "pct_m1"]$pvalue, dec = 3) +
  TexRow("") / TexRow(dt[X == "pct_m1"]$se, dec = 3, se = T, space = 3) +
  TexRow("$t = 0$") / TexRow(dt[X == "pct_p0"]$b, pvalues = dt[X == "pct_p0"]$pvalue, dec = 3) +
  TexRow("") / TexRow(dt[X == "pct_p0"]$se, dec = 3, se = T, space = 3) +
  TexRow("$t > 0$ avg.") / TexRow(dt[X == "pct_agg_post"]$b, pvalues = dt[X == "pct_agg_post"]$pvalue, dec = 3) +
  TexRow("") / TexRow(dt[X == "pct_agg_post"]$se, dec = 3, se = T) +
  TexMidrule(list(c(2, 7))) +
  TexRow(c("", "Response in levels"), cspan = c(1, 6)) +
  TexMidrule(list(c(2, 7))) +
  # Second panel: levels
  TexRow("$t = -1$") / TexRow(dt[X == "m1"]$b, pvalues = dt[X == "m1"]$pvalue, dec = 0) +
  TexRow("") / TexRow(dt[X == "m1"]$se, dec = 0, se = T, space = 3) +
  TexRow("$t = 0$") / TexRow(dt[X == "p0"]$b, pvalues = dt[X == "p0"]$pvalue, dec = 0) +
  TexRow("") / TexRow(dt[X == "p0"]$se, dec = 0, se = T, space = 3) +
  TexRow("$t > 0$ avg.") / TexRow(dt[X == "agg_post"]$b, pvalues = dt[X == "agg_post"]$pvalue, dec = 0) +
  TexRow("") / TexRow(dt[X == "agg_post"]$se, dec = 0, se = T) +
  TexMidrule() +
  TexRow(c("\\multicolumn{7}{l}{\\emph{Sample selection minimums at $t=-1$}}")) +
  TexRow(c("Ownership share", "1/3", "1/4", "1/2", rep("1/3", 3)), space = 3) +
  TexRow(c("Firm age", rep("2", 3), "3", "5", "7")) +
  TexMidrule() +
  TexRow("$\\bar{Y}$") / 
  TexRow(dt[X == "dep_mean"]$b, dec = 0) +
  TexRow("$N$ owners") / 
  TexRow(dt[X == "n_owners"]$b, dec = 0) +
  TexRow("$N$ firms") / 
  TexRow(dt[X == "n_firms"]$b, dec = 0)

TexSave(tab = tt_rob, positions = c("r", rep("c", 6)), 
        filename = "robustness_sample_criteria", output_path = "./exhibits")

# ===========================================================================
# Table A2: additional robustness (robustness.tex)
## baseline (original, profits)
## alternative weights * 2
## operating profits
## Two panels, one for percents and one for levels
# ===========================================================================

dt <- data.table()

# original outcome, method
dt_tmp <- fread("estimates/admin/regression/profit_before_self_trim_ATT_male0.csv")
dt_tmp[, group := "orig"]
dt <- rbind(dt, dt_tmp)

# alternative weights (2-digit industry)
dt_tmp <- fread("estimates/admin/robustness/profit_before_self_trim_2digit_male0.csv")
dt_tmp[, group := "2digit"]
dt <- rbind(dt, dt_tmp)

# alternative weights (industry * education)
dt_tmp <- fread("estimates/admin/robustness/profit_before_self_trim_educ_male0.csv")
dt_tmp[, group := "educ"]
dt <- rbind(dt, dt_tmp)

# using past parents as controls
dt_tmp <- fread("estimates/admin/robustness/profit_before_self_trim_parentcontrol_male0.csv")
dt_tmp[, group := "parent"]
dt <- rbind(dt, dt_tmp)

# using past parents (+10 years since last child) as controls
dt_tmp <- fread("estimates/admin/robustness/profit_before_self_trim_m10control_male0.csv")
dt_tmp[, group := "m10"]
dt <- rbind(dt, dt_tmp)

# operating profits outcome
dt_tmp <- fread("estimates/admin/regression/operating_profits_trim_ATT_male0.csv")
dt_tmp[, group := "operating"]
dt <- rbind(dt, dt_tmp)

dt <- dt[aggregation == "summary" | 
           (aggregation == "est_pct" & X %in% c("pct_m1", "pct_p0")) | 
           (aggregation == "est_es" & X %in% c("m1", "p0")) | 
           X %in% c("agg_post", "pct_agg_post")]

# get the means
dt_means1 <- fread("estimates/admin/regression/pretreat_means.csv")
dt_means1 <- melt(dt_means1[male == 0, .(profit_before_self_trim, 
                                         operating_profits_trim)],
                  measure.vars = c("profit_before_self_trim", 
                                   "operating_profits_trim"),
                  value.name = "profit_before_self_trim")
dt_means1[, group := ifelse(variable == "operating_profits_trim", "operating", "orig")]

dt_means2 <- fread("estimates/admin/robustness/pretreat_means_2digit.csv")
dt_means2 <- dt_means2[male == 0]
dt_means2[, group := "2digit"]

dt_means3 <- fread("estimates/admin/robustness/pretreat_means_educ.csv")
dt_means3 <- dt_means3[male == 0]
dt_means3[, group := "educ"]

dt_means4 <- fread("estimates/admin/robustness/pretreat_means_parentcontrol.csv")
dt_means4 <- dt_means4[male == 0]
dt_means4[, group := "parent"]

dt_means5 <- fread("estimates/admin/robustness/pretreat_means_m10control.csv")
dt_means5 <- dt_means5[male == 0]
dt_means5[, group := "m10"]

dt_means <- rbindlist(list(dt_means1[, .(group, profit_before_self_trim)], 
                           dt_means2[, .(group, profit_before_self_trim)], 
                           dt_means3[, .(group, profit_before_self_trim)], 
                           dt_means4[, .(group, profit_before_self_trim)], 
                           dt_means5[, .(group, profit_before_self_trim)]))

dt <- merge(dt, dt_means, by = "group")
dt[X == "dep_mean", b := profit_before_self_trim]

# order the columns
dt <- merge(dt, data.table(group = c("orig", "parent", "m10", "2digit", "educ", "operating"),
                           col_ord = 1:6), by = "group")
setkey(dt, col_ord)

# make the table
tt_rob <- 
  TexRow("") / TexRow(1:6, dec = 0, surround = "(%s)") +
  TexMidrule(list(c(2, 7))) +
  TexRow(c("", "Response as percentage change"), cspan = c(1, 6)) +
  TexMidrule(list(c(2, 7))) +
  # First panel: percentage change
  TexRow("$t = -1$") / TexRow(dt[X == "pct_m1"]$b, pvalues = dt[X == "pct_m1"]$pvalue, dec = 3) +
  TexRow("") / TexRow(dt[X == "pct_m1"]$se, dec = 3, se = T, space = 3) +
  TexRow("$t = 0$") / TexRow(dt[X == "pct_p0"]$b, pvalues = dt[X == "pct_p0"]$pvalue, dec = 3) +
  TexRow("") / TexRow(dt[X == "pct_p0"]$se, dec = 3, se = T, space = 3) +
  TexRow("$t > 0$ avg.") / TexRow(dt[X == "pct_agg_post"]$b, pvalues = dt[X == "pct_agg_post"]$pvalue, dec = 3) +
  TexRow("") / TexRow(dt[X == "pct_agg_post"]$se, dec = 3, se = T) +
  TexMidrule(list(c(2, 7))) +
  TexRow(c("", "Response in levels"), cspan = c(1, 6)) +
  TexMidrule(list(c(2, 7))) +
  # Second panel: levels
  TexRow("$t = -1$") / TexRow(dt[X == "m1"]$b, pvalues = dt[X == "m1"]$pvalue, dec = 0) +
  TexRow("") / TexRow(dt[X == "m1"]$se, dec = 0, se = T, space = 3) +
  TexRow("$t = 0$") / TexRow(dt[X == "p0"]$b, pvalues = dt[X == "p0"]$pvalue, dec = 0) +
  TexRow("") / TexRow(dt[X == "p0"]$se, dec = 0, se = T, space = 3) +
  TexRow("$t > 0$ avg.") / TexRow(dt[X == "agg_post"]$b, pvalues = dt[X == "agg_post"]$pvalue, dec = 0) +
  TexRow("") / TexRow(dt[X == "agg_post"]$se, dec = 0, se = T) +
  TexMidrule() +
  TexRow(c("Outcome variable", rep("Profits before", 5), "Operating profits")) +
  TexRow(c("", rep("owner salaries", 5), ""), space = 3) +
  TexRow(c("Control group", "Full", "Other parents", "No child born", rep("Full", 3))) +
  TexRow(c("", "", "", "past 10 years", rep("", 3)), space = 3) +
  TexRow(c("Weighting vars.", rep("Firm age", 6))) +
  TexRow(c("", rep("$\\times$ SIC section", 3), "$\\times$ SIC division", "$\\times$ SIC section", "$\\times$ SIC section")) +
  TexRow(c("", "", "", "", "(2-digit industry)", "$\\times$ owner educ.", "")) +
  TexMidrule() +
  TexRow("$\\bar{Y}$") / 
  TexRow(dt[X == "dep_mean"]$b, dec = 0) +
  TexRow("$N$ owners") / 
  TexRow(dt[X == "n_owners"]$b, dec = 0) +
  TexRow("$N$ firms") / 
  TexRow(dt[X == "n_firms"]$b, dec = 0)

TexSave(tab = tt_rob, positions = c("r", rep("c", 6)), 
        filename = "robustness", output_path = "./exhibits")

# ===========================================================================
# Figure 3: working hours (time_use.pdf)
# ===========================================================================
dt <- data.table()
for (g in c("m", "f")) {
  for (avg in c("", "_average")) {
    dt_tmp <- read.dta13(sprintf("estimates/silc/%s_hrs%s.dta", g, avg))
    setDT(dt_tmp)
    dt_tmp <- dt_tmp[grepl("child", variable) & !variable %in% c("0.child", "0.haschild")]
    dt_tmp[, event_time := as.integer(gsub(".child", "", variable, fixed = T))]
    dt_tmp[, sex := ifelse(g == "f", "Women", "Men")]
    dt <- rbind(dt, dt_tmp)
  }
}

setnames(dt, "coef", "b")
dt[, ll := b - 1.96 * se]
dt[, ul := b + 1.96 * se]

# averages of 0-3 (lincom results from analysis.do)
dt_avg <- read.dta13("estimates/silc/lincom_avg_0_3.dta")
setDT(dt_avg)
setnames(dt_avg, "coef", "b")
dt_avg[, ll := b - 1.96 * se]
dt_avg[, ul := b + 1.96 * se]
dt_avg[, event_time := 11.5]

dt_agg <- dt[is.na(event_time)]
dt_agg[, event_time := NULL]
dt_agg[, event_time := 12.5]
dt_agg <- rbind(dt_agg, dt_avg[outcome == "hrs_work"], fill = T)

dt <- dt[!is.na(event_time)]

dt_agg[, lab := sprintf(
  "'%s:'~ widehat(beta)['0-3'] == '%.1f ('* %.1f *')'",
  sex, round(b, 1), round(se, 1)
)]
legend_labs <- lapply(c(dt_agg[event_time == 11.5 & sex == "Women"]$lab, 
                        dt_agg[event_time == 11.5 & sex == "Men"]$lab),
                      function(lbl) parse(text = lbl))

ggplot(dt, aes(x = event_time, y = b, color = sex, fill = sex, shape = sex)) +
  geom_point(size = 2, position=position_dodge(width=0.4)) +
  geom_vline(aes(xintercept = 10.75), linetype = "solid", color = "black", linewidth = 0.25) +
  geom_hline(aes(yintercept = 0), linetype = "dashed", color = "black") +
  geom_errorbar(aes(ymin = ll, ymax = ul), width = 0, linewidth = 0.3,
                position=position_dodge(width=0.4)) +
  scale_shape_manual(breaks = c("Women", "Men"), values = c(16, 17), labels = legend_labs) +
  scale_color_manual(breaks = c("Women", "Men"), values = c("#B22222", "#4682B4"), labels = legend_labs) +
  scale_fill_manual(breaks = c("Women", "Men"), values = c("#B22222", "#4682B4"), labels = legend_labs) +
  # manually add the avg estimates
  geom_errorbar(data = dt_agg[sex == "Women"],
                aes(x = event_time + 0.1, ymin = ll, ymax = ul), width = 0.15, linewidth = 0.3,
                position=position_dodge(width=0.4), color = "#B22222") +
  geom_point(data = dt_agg[sex == "Women"],
             aes(x = event_time + 0.1, y = b), color = "#B22222", shape = 21, size = 2,
             fill = "white", position=position_dodge(width=0.4)) + 
  geom_errorbar(data = dt_agg[sex == "Men"],
                aes(x = event_time - 0.1, ymin = ll, ymax = ul), width = 0.15, linewidth = 0.3,
                position=position_dodge(width=0.4), color = "#4682B4") +
  geom_point(data = dt_agg[sex == "Men"],
             aes(x = event_time - 0.1, y = b), color = "#4682B4", shape = 24, size = 2,
             fill = "white", position=position_dodge(width=0.4)) + 
  scale_x_continuous(breaks = c(seq(1, 10, 1), 11.5, 12.5), expand = c(0.01, 0.01),
                     labels = c("0-1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "0-3", "0-10"),
                     limits = c(0.5, 13.1)) +
  labs(x = "Age of youngest child (years)",
       y = "Estimate, weekly hours worked",
       color = NULL, fill = NULL, shape = NULL) +
  theme_bw(base_size = 11) +
  theme(text = element_text(family = "Helvetica"),
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        legend.position = "bottom",
        legend.text = element_text(size = 8.3),
        legend.box.margin = margin(5, 5, 5, 5),
        legend.margin = margin(0, 0, 5, 0))
ggsave("exhibits/time_use.pdf", width = 130, height = 100, units = "mm")

# ===========================================================================
# Figure A4: health outcomes (good_health.pdf)
# ===========================================================================
dt <- data.table()
for (g in c("m", "f")) {
  for (avg in c("", "_average")) {
    dt_tmp <- read.dta13(sprintf("estimates/silc/%s_goodhealth%s.dta", g, avg))
    setDT(dt_tmp)
    dt_tmp <- dt_tmp[grepl("child", variable) & !variable %in% c("0.child", "0.haschild")]
    dt_tmp[, event_time := as.integer(gsub(".child", "", variable, fixed = T))]
    dt_tmp[, sex := ifelse(g == "f", "Women", "Men")]
    dt <- rbind(dt, dt_tmp)
  }
}

setnames(dt, "coef", "b")
dt[, ll := b - 1.96 * se]
dt[, ul := b + 1.96 * se]

dt_agg <- dt[is.na(event_time)]
dt_agg[, event_time := NULL]
dt_agg[, event_time := 12.5]
dt_agg <- rbind(dt_agg, dt_avg[outcome == "good_health"], fill = T)

dt <- dt[!is.na(event_time)]

dt_agg[, lab := sprintf(
  "'%s:'~ widehat(beta)['0-3'] == '%.2f (%.2f)'",
  sex, round(b, 2), round(se, 2)
)]
legend_labs <- lapply(c(dt_agg[event_time == 11.5 & sex == "Women"]$lab, 
                        dt_agg[event_time == 11.5 & sex == "Men"]$lab),
                      function(lbl) parse(text = lbl))

ggplot(dt, aes(x = event_time, y = b, color = sex, fill = sex, shape = sex)) +
  geom_point(size = 2, position=position_dodge(width=0.4)) +
  geom_vline(aes(xintercept = 10.75), linetype = "solid", color = "black", linewidth = 0.25) +
  geom_hline(aes(yintercept = 0), linetype = "dashed", color = "black") +
  geom_errorbar(aes(ymin = ll, ymax = ul), width = 0, linewidth = 0.3,
                position=position_dodge(width=0.4)) +
  scale_shape_manual(breaks = c("Women", "Men"), values = c(16, 17), labels = legend_labs) +
  scale_color_manual(breaks = c("Women", "Men"), values = c("#B22222", "#4682B4"), labels = legend_labs) +
  scale_fill_manual(breaks = c("Women", "Men"), values = c("#B22222", "#4682B4"), labels = legend_labs) +
  # manually add the avg estimates
  geom_errorbar(data = dt_agg[sex == "Women"],
                aes(x = event_time + 0.1, ymin = ll, ymax = ul), width = 0.15, linewidth = 0.3,
                position=position_dodge(width=0.4), color = "#B22222") +
  geom_point(data = dt_agg[sex == "Women"],
             aes(x = event_time + 0.1, y = b), color = "#B22222", shape = 21, size = 2,
             fill = "white", position=position_dodge(width=0.4)) + 
  geom_errorbar(data = dt_agg[sex == "Men"],
                aes(x = event_time - 0.1, ymin = ll, ymax = ul), width = 0.15, linewidth = 0.3,
                position=position_dodge(width=0.4), color = "#4682B4") +
  geom_point(data = dt_agg[sex == "Men"],
             aes(x = event_time - 0.1, y = b), color = "#4682B4", shape = 24, size = 2,
             fill = "white", position=position_dodge(width=0.4)) + 
  scale_x_continuous(breaks = c(seq(1, 10, 1), 11.5, 12.5), expand = c(0.01, 0.01),
                     labels = c("0-1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "0-3", "0-10"),
                     limits = c(0.5, 13.1)) +
  labs(x = "Age of youngest child (years)",
       y = "Estimate, good health",
       color = NULL, fill = NULL, shape = NULL) +
  theme_bw(base_size = 11) +
  theme(text = element_text(family = "Helvetica"),
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        legend.position = "bottom",
        legend.text = element_text(size = 8.3),
        legend.box.margin = margin(5, 5, 5, 5),
        legend.margin = margin(0, 0, 5, 0))
ggsave("exhibits/good_health.pdf", width = 130, height = 100, units = "mm")

# ===========================================================================
# Table A4: labor outcome responses (labor_responses.tex)
# ===========================================================================

dt <- data.table()

# any wage > 10,000
dt_tmp <- fread("estimates/admin/regression/any_wage_ATT_male0.csv")
dt_tmp[, outcome := "Any wage $\\geq$\\$10K"]
dt_tmp[, sex := "Female"]
dt <- rbind(dt, dt_tmp)

dt_tmp <- fread("estimates/admin/regression/any_wage_ATT_male1.csv")
dt_tmp[, outcome := "Any wage $\\geq$\\$10K"]
dt_tmp[, sex := "Male"]
dt <- rbind(dt, dt_tmp)

# other wages
dt_tmp <- fread("estimates/admin/regression/wage_earnings_trim_ATT_male0.csv")
dt_tmp[, outcome := "Other wages"]
dt_tmp[, sex := "Female"]
dt <- rbind(dt, dt_tmp)

dt_tmp <- fread("estimates/admin/regression/wage_earnings_trim_ATT_male1.csv")
dt_tmp[, outcome := "Other wages"]
dt_tmp[, sex := "Male"]
dt <- rbind(dt, dt_tmp)

# total value added
dt_tmp <- fread("estimates/admin/regression/lnr_lfirm_salary_trim_ATT_male0.csv")
dt_tmp[, outcome := "Self-paid wages"]
dt_tmp[, sex := "Female"]
dt <- rbind(dt, dt_tmp)

dt_tmp <- fread("estimates/admin/regression/lnr_lfirm_salary_trim_ATT_male1.csv")
dt_tmp[, outcome := "Self-paid wages"]
dt_tmp[, sex := "Male"]
dt <- rbind(dt, dt_tmp)
dt <- dt[aggregation == "summary" | (aggregation == "est_es" & X %in% c("m1", "p0")) | X == "agg_post"]

# get the means in t = -1 
dt_means <- fread("estimates/admin/regression/pretreat_means.csv")

dt_means <- melt(dt_means, id.vars = "male")
dt_means <- merge(dt_means, data.table(variable = c("any_wage", 
                                                    "lnr_lfirm_salary_trim", 
                                                    "wage_earnings_trim"),
                                       outcome = c("Any wage $\\geq$\\$10K",
                                                   "Self-paid wages", 
                                                   "Other wages")))
dt_means[, sex := ifelse(male == 1, "Male", "Female")]
dt_means <- dt_means[, .(outcome, pre_mean = value, sex)]

# sort columns and merge on the means
dt <- merge(dt, data.table(outcome = c("Self-paid wages", 
                                       "Other wages", 
                                       "Any wage $\\geq$\\$10K"),
                           col_ord = 1:3),
            by = "outcome")
dt <- merge(dt, dt_means, by = c("outcome", "sex"), all.x = T)
setkey(dt, col_ord)

# format the estimates
dt[, pstars := ifelse(pvalue > 0.1, "\\phantom{***}",
                      ifelse(pvalue > 0.05, "*\\phantom{**}",
                             ifelse(pvalue > 0.01, "**\\phantom{*}", "***")))]
dt[outcome %in% c("Other wages", "Self-paid wages"),
   b_fmt := paste0(ifelse(b < 0, "-", "\\phantom{--}"),
                          trimws(format(round(b), big.mark = ",", nsmall = 0)),
                   " (", trimws(format(round(se), big.mark = ",", nsmall = 0)), ")",
                   pstars)]
dt[outcome == "Any wage $\\geq$\\$10K",
   b_fmt := paste0(ifelse(b < 0, "-", "\\phantom{--}"),
                   trimws(format(round(b, 3), big.mark = ",", nsmall = 3)),
                   " (", trimws(format(round(se, 3), big.mark = ",", nsmall = 3)), ")",
                   pstars)]

# first, columns for women
col_dec <- c(0, 0, 3)

tt_women <- 
  TexRow("") / TexRow(1:3, dec = 0, surround = "(%s)", space = 3) +
  TexRow(c("", "", "", "Any other")) +
  TexRow(c("", "Self-paid wages", "Other wages", "wage $\\geq$\\$10K")) +
  TexMidrule(list(c(2, 4))) +
  TexRow("Women, $t = -1$") / TexRow(dt[X == "m1" & sex == "Female"]$b_fmt, space = 3) +
  TexRow("Women, $t = 0$") / TexRow(dt[X == "p0" & sex == "Female"]$b_fmt, space = 3) +
  TexRow("Women, $t > 0$ avg.") / TexRow(dt[X == "agg_post" & sex == "Female"]$b_fmt) +
  TexMidrule(list(c(2, 4))) +
  TexRow("$\\bar{Y}$, female owners") /
  TexRow(dt[X == "dep_mean" & sex == "Female"]$b, dec = col_dec) +
  TexRow("$N$ female owners") /
  TexRow(dt[X == "n_owners" & sex == "Female"]$b, dec = 0)

tt_men <-
  TexRow("Men, $t = -1$") / TexRow(dt[X == "m1" & sex == "Male"]$b_fmt, space = 3) +
  TexRow("Men, $t = 0$") / TexRow(dt[X == "p0" & sex == "Male"]$b_fmt, space = 3) +
  TexRow("Men, $t > 0$ avg.") / TexRow(dt[X == "agg_post" & sex == "Male"]$b_fmt) +
  TexMidrule(list(c(2, 4))) +
  TexRow("$\\bar{Y}$, male owners") / 
  TexRow(dt[X == "dep_mean" & sex == "Male"]$b, dec = col_dec) +
  TexRow("$N$ male owners") / 
  TexRow(dt[X == "n_owners" & sex == "Male"]$b, dec = 0)

tt <- tt_women + TexMidrule() + tt_men

TexSave(tab = tt, positions = c("r", rep("c", 3)), 
        filename = "labor_responses", output_path = "./exhibits")

# ===========================================================================
# Figure A1: ownership share (ownership_share.pdf)
# ===========================================================================

dt <- fread("estimates/admin/regression/ownership_pct_0618_male0.csv")
dt[, sex := "Women"]
dt_agg <- dt[aggregation == "est_agg" & X == "agg_post"]
dt_agg[, event_time := 11.5]
dt <- dt[aggregation == "est_es"]
dt[, event_time := event_time - 6]

dt2 <- fread("estimates/admin/regression/ownership_pct_0618_male1.csv")
dt2[, sex := "Men"]
dt2_agg <- dt2[aggregation == "est_agg" & X == "agg_post"]
dt2_agg[, event_time := 11.5]
dt2 <- dt2[aggregation == "est_es"]
dt2[, event_time := event_time - 6]

dt <- rbind(dt, dt2)
dt_agg <- rbind(dt_agg, dt2_agg)

# construct the labels
dt_agg[, `:=`(
  b_fmt  = sub("^\\+", "", sprintf("%+.2f", b)), 
  se_fmt = sprintf("%.2f", se) 
)]

dt_agg[, lab := sprintf(
  "'%s:'~ widehat(beta)['post'] == '%s (%s)'",
  sex, b_fmt, se_fmt
)]

legend_labs <- lapply(dt_agg$lab, function(lbl) parse(text = lbl))

ggplot(dt, aes(x = event_time, y = b, color = sex, fill = sex, shape = sex)) +
  geom_point(size = 2, position=position_dodge(width=0.4)) +
  geom_vline(aes(xintercept = 0), linetype = "dotted", color = "red") +
  geom_vline(aes(xintercept = 10.75), linetype = "solid", color = "black", linewidth = 0.25) +
  geom_hline(aes(yintercept = 0), linetype = "dashed", color = "black") +
  geom_errorbar(aes(ymin = ll, ymax = ul), width = 0, linewidth = 0.3,
                position=position_dodge(width=0.4)) +
  scale_shape_manual(breaks = c("Women", "Men"), values = c(16, 17), labels = legend_labs) +
  scale_color_manual(breaks = c("Women", "Men"), values = c("#B22222", "#4682B4"), labels = legend_labs) +
  scale_fill_manual(breaks = c("Women", "Men"), values = c("#B22222", "#4682B4"), labels = legend_labs) +
  # manually add the avg estimates
  geom_errorbar(data = dt_agg[sex == "Women"],
                aes(x = event_time + 0.1, ymin = ll, ymax = ul), width = 0.15, linewidth = 0.3,
                position=position_dodge(width=0.4), color = "#B22222") +
  geom_point(data = dt_agg[sex == "Women"],
             aes(x = event_time + 0.1, y = b), color = "#B22222", shape = 21, size = 2,
             fill = "white", position=position_dodge(width=0.4)) + 
  geom_errorbar(data = dt_agg[sex == "Men"],
                aes(x = event_time - 0.1, ymin = ll, ymax = ul), width = 0.15, linewidth = 0.3,
                position=position_dodge(width=0.4), color = "#4682B4") +
  geom_point(data = dt_agg[sex == "Men"],
             aes(x = event_time - 0.1, y = b), color = "#4682B4", shape = 24, size = 2,
             fill = "white", position=position_dodge(width=0.4)) + 
  ylim(-0.2, 0.2) +
  scale_x_continuous(breaks = c(seq(-5, 10, 1), 11.5), expand = c(0.01, 0.01),
                     labels = c("-5", "-4", "-3", "-2", "-1", "0",
                                "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "Avg, 1-10"),
                     limits = c(-5.1, 12.1)) +
  labs(x = "Time since first birth (years)",
       y = "Ownership share",
       color = NULL, fill = NULL, shape = NULL) +
  theme_bw(base_size = 12) +
  theme(text = element_text(family = "Helvetica"),
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        legend.position = "inside",
        legend.position.inside = c(0.01, 0.01),
        legend.justification = c(0, 0),
        legend.text = element_text(size = 8.3),
        legend.box.margin = margin(5, 5, 5, 5),
        legend.margin = margin(0, 0, 5, 0))
ggsave("exhibits/ownership_share.pdf", width = 180, height = 100, units = "mm")

# ===========================================================================
# Table A3: robustness to trimming thresholds (firm_responses_trimming.tex)
# ===========================================================================

dt_all <- data.table()
for (thresh in c("5", "25")) {
  
dt <- data.table()

# profits (level)
dt_tmp <- fread(sprintf("estimates/admin/robustness/trim_%s/profit_before_self_trim_ATT_male0_trim%s.csv", thresh, thresh))
dt_tmp[, outcome := "Profits"]
dt_tmp[, sex := "Female"]
dt <- rbind(dt, dt_tmp)

dt_tmp <- fread(sprintf("estimates/admin/robustness/trim_%s/profit_before_self_trim_ATT_male1_trim%s.csv", thresh, thresh))
dt_tmp[, outcome := "Profits"]
dt_tmp[, sex := "Male"]
dt <- rbind(dt, dt_tmp)

# total value added
dt_tmp <- fread(sprintf("estimates/admin/robustness/trim_%s/value_added_trim_ATT_male0_trim%s.csv", thresh, thresh))
dt_tmp[, outcome := "Value added"]
dt_tmp[, sex := "Female"]
dt <- rbind(dt, dt_tmp)

dt_tmp <- fread(sprintf("estimates/admin/robustness/trim_%s/value_added_trim_ATT_male1_trim%s.csv", thresh, thresh))
dt_tmp[, outcome := "Value added"]
dt_tmp[, sex := "Male"]
dt <- rbind(dt, dt_tmp)

# Num workers
dt_tmp <- fread(sprintf("estimates/admin/robustness/trim_%s/n_emp_excl_owners_trim_ATT_male0_trim%s.csv", thresh, thresh))
dt_tmp[, outcome := "Employees"]
dt_tmp[, sex := "Female"]
dt <- rbind(dt, dt_tmp, fill = T)

dt_tmp <- fread(sprintf("estimates/admin/robustness/trim_%s/n_emp_excl_owners_trim_ATT_male1_trim%s.csv", thresh, thresh))
dt_tmp[, outcome := "Employees"]
dt_tmp[, sex := "Male"]
dt <- rbind(dt, dt_tmp, fill = T)

dt_tmp <- fread(sprintf("estimates/admin/robustness/trim_%s/owner_salaries_trim_ATT_male0_trim%s.csv", thresh, thresh))
dt_tmp[, outcome := "Owner salaries"]
dt_tmp[, sex := "Female"]
dt <- rbind(dt, dt_tmp, fill = T)

dt_tmp <- fread(sprintf("estimates/admin/robustness/trim_%s/owner_salaries_trim_ATT_male1_trim%s.csv", thresh, thresh))
dt_tmp[, outcome := "Owner salaries"]
dt_tmp[, sex := "Male"]
dt <- rbind(dt, dt_tmp, fill = T)

dt <- dt[aggregation == "summary" | 
           (aggregation == "est_es" & X %in% c("m1", "p0")) | 
           X == "agg_post" |
           (outcome == "Profits" & ((aggregation == "est_pct" & X %in% c("pct_m1", "pct_p0")) | aggregation == "est_pct_agg"))]
dt[(outcome == "Profits" & ((aggregation == "est_pct" & X %in% c("pct_m1", "pct_p0")) | aggregation == "est_pct_agg")),
   `:=` (outcome = "Profits (pct.)", X = gsub("pct_", "", X))]

# get the means in t = -1 for everything except "sold firm"
dt_means <- fread(sprintf("estimates/admin/robustness/trim_%s/pretreat_means_trim%s.csv", thresh, thresh))

dt_means <- melt(dt_means, id.vars = "male")
dt_means <- merge(dt_means, data.table(variable = c("profit_before_self_trim", 
                                                    "owner_salaries_trim", 
                                                    "value_added_trim",
                                                    "n_emp_excl_owners_trim"),
                                       outcome = c("Profits",
                                                   "Owner salaries", 
                                                   "Value added",
                                                   "Employees")))
dt_means[, sex := ifelse(male == 1, "Male", "Female")]
dt_means <- dt_means[, .(outcome, pre_mean = value, sex)]

# sort columns and merge on the means
dt <- merge(dt, data.table(outcome = c("Profits (pct.)",
                                       "Profits", 
                                       "Owner salaries", 
                                       "Value added",
                                       "Employees"),
                           col_ord = 1:5),
            by = "outcome")
dt <- merge(dt, dt_means, by = c("outcome", "sex"), all.x = T)
dt[X == "dep_mean", b := pre_mean]
setkey(dt, col_ord)
dt[, trim := thresh]

dt_all <- rbind(dt_all, dt)
}

# Num. obs for profit (pct.) is same as profit (level)
dt_n <- dt_all[X == "n_owners" & outcome == "Profits"]
dt_n[, outcome := "Profits (pct.)"]
dt_all <- rbind(dt_all, dt_n)

setkey(dt_all, trim, col_ord)
dt_all[is.na(pvalue), pvalue := 0.999]

col_dec <- c(3, 0, 0, 0, 3)

tt_women <- 
  TexRow("") / TexRow(1:5, dec = 0, surround = "(%s)", space = 3) +
  TexRow(c("", "Profits,", "Profits,", "Value", "Owner", "")) +
  TexRow(c("", "pct. change", "level", "added", "salaries", "Employees")) +
  TexMidrule() +
  TexRow(c("\\emph{Panel A: 2.5\\% trimming}", rep("", 5))) +
  TexRow("$t = -1$") / TexRow(dt_all[X == "m1" & sex == "Female" & trim == "25"]$b,
                                     dec = col_dec,
                                     pvalues = dt_all[X == "m1" & sex == "Female" & trim == "25"]$pvalue) +
  TexRow("") / TexRow(dt_all[X == "m1" & sex == "Female" & trim == "25"]$se, dec = col_dec, se = T,
                      space = 3)  +
  TexRow("$t = 0$") / TexRow(dt_all[X == "p0" & sex == "Female" & trim == "25"]$b,
                                    dec = col_dec,
                                    pvalues = dt_all[X == "p0" & sex == "Female" & trim == "25"]$pvalue) +
  TexRow("") / TexRow(dt_all[X == "p0" & sex == "Female" & trim == "25"]$se, dec = col_dec, se = T,
                      space = 3) +
  TexRow("$t > 0$ avg.") / TexRow(dt_all[X == "agg_post" & sex == "Female" & trim == "25"]$b,
                                         dec = col_dec,
                                         pvalues = dt_all[X == "agg_post" & sex == "Female" & trim == "25"]$pvalue) +
  TexRow("") / TexRow(dt_all[X == "agg_post" & sex == "Female" & trim == "25"]$se, dec = col_dec, se = T) +
  TexMidrule(list(c(2, 6))) +
  TexRow(c("Dep. var. mean")) / 
  TexRow(c(1, dt_all[X == "dep_mean" & sex == "Female" & trim == "25"]$b), dec = col_dec) +
  TexRow("$N$ owners") / 
  TexRow(dt_all[X == "n_owners" & sex == "Female" & trim == "25"]$b, dec = 0) +
  TexMidrule() +
  TexRow(c("\\emph{Panel B: 5\\% trimming}", rep("", 5))) +
  TexRow("$t = -1$") / TexRow(dt_all[X == "m1" & sex == "Female" & trim == "5"]$b,
                              dec = col_dec,
                              pvalues = dt_all[X == "m1" & sex == "Female" & trim == "5"]$pvalue) +
  TexRow("") / TexRow(dt_all[X == "m1" & sex == "Female" & trim == "5"]$se, dec = col_dec, se = T,
                      space = 3)  +
  TexRow("$t = 0$") / TexRow(dt_all[X == "p0" & sex == "Female" & trim == "5"]$b,
                             dec = col_dec,
                             pvalues = dt_all[X == "p0" & sex == "Female" & trim == "5"]$pvalue) +
  TexRow("") / TexRow(dt_all[X == "p0" & sex == "Female" & trim == "5"]$se, dec = col_dec, se = T,
                      space = 3) +
  TexRow("$t > 0$ avg.") / TexRow(dt_all[X == "agg_post" & sex == "Female" & trim == "5"]$b,
                                  dec = col_dec,
                                  pvalues = dt_all[X == "agg_post" & sex == "Female" & trim == "5"]$pvalue) +
  TexRow("") / TexRow(dt_all[X == "agg_post" & sex == "Female" & trim == "5"]$se, dec = col_dec, se = T) +
  TexMidrule(list(c(2, 6))) +
  TexRow(c("Dep. var. mean")) / 
  TexRow(c(1, dt_all[X == "dep_mean" & sex == "Female" & trim == "5"]$b), dec = col_dec) +
  TexRow("$N$ owners") / 
  TexRow(dt_all[X == "n_owners" & sex == "Female" & trim == "5"]$b, dec = 0)

TexSave(tab = tt_women, positions = c("r", rep("c", 5)), 
        filename = "firm_responses_trimming", output_path = "./exhibits")

# ===========================================================================
# Figure A2: profit responses by firm FE, older firms (profit_by_fe_3p.pdf)
# ===========================================================================

# at least 3 years old
dt_dec <- data.table()
for (d in 1:4) {
  dt_temp <- fread(paste0("estimates/admin/fe_quartiles/3periods/profit_before_self_trim_quartile", d, "_male0_ATT_3p.csv"))
  dt_temp[, decile := d]
  dt_temp[, sex := "Women"]
  dt_dec <- rbind(dt_dec, dt_temp)
  
  dt_temp <- fread(paste0("estimates/admin/fe_quartiles/3periods/profit_before_self_trim_quartile", d, "_male1_ATT_3p.csv"))
  dt_temp[, decile := d]
  dt_temp[, sex := "Men"]
  dt_dec <- rbind(dt_dec, dt_temp)
}

dt_dec_agg <- dt_dec[aggregation == "est_pct_agg" & X == "pct_agg_post"]
dt_dec_agg$event_time <- NULL
dt_dec_agg[, event_time := 11.5]

dt_dec <- dt_dec[aggregation == "est_pct"]
dt_dec[, X := gsub("pct_", "", X)]
dt_dec[, event_time := ifelse(grepl("m", X), -1, 1) * as.integer(gsub("m|p", "", X))]

# prep sorted data for later plots
dt_dec_plt <- rbindlist(list(dt_dec[event_time == 0],
                             dt_dec_agg),
                        use.names = T)
dt_dec_plt[, event_time_lab := ifelse(event_time == 0, 
                                      "'Birth year ('*italic(t) == 0*')'",
                                      "'Post-period average ('*italic(t) %in% '[1, 10])'")]

ggplot(dt_dec_plt,
       aes(x = decile, y = b, color = sex, shape = sex)) +
  facet_wrap(~event_time_lab, ncol = 1,
             labeller = label_parsed) +
  geom_point(position = position_dodge(0.5)) +
  geom_errorbar(aes(ymin = ll, ymax = ul), width = 0, linewidth = 0.5,
                position = position_dodge(0.5)) + 
  geom_hline(aes(yintercept = 0), color = "red") +
  scale_y_continuous(breaks = seq(-0.5, 0.5, 0.25)) +
  scale_color_manual(breaks = c("Women", "Men"), values = c("#B22222", "#4682B4")) +
  scale_shape_manual(breaks = c("Women", "Men"), values = c(16, 17)) +
  scale_x_continuous(breaks = seq(1, 4, 1),
                     labels = c("10-30", "30-50", "50-70", "70-90")) +
  labs(x = "Firm fixed effect (percentile)",
       y = "Profit response (pct. change)", color = NULL, shape = NULL) +
  coord_flip() +
  theme_bw(base_size = 12) +
  theme(panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank(),
        legend.position = "inside",
        legend.position.inside = c(0.99, 0.99),
        legend.justification = c(1, 1))
ggsave("exhibits/profit_by_fe_3p.pdf",
       width = 150, height = 130, units = "mm")
